Task 1

getwd()
## [1] "C:/Users/nico/Documents/MATH 4753/Labs/Lab 8"

Task 2

a = 0
b = 5
runif(10,a,b)
##  [1] 2.4939054 1.5850592 2.3393595 2.7596144 3.4628957 3.0234064 2.2083166
##  [8] 4.3593284 4.4497258 0.7714912
sample = c(1.3203239, 4.8155007, 0.6193721, 0.5776914, 4.7227894, 4.1488387, 3.9591730,
           1.7561037, 3.436954, 3.6542493)
cat("sample:", sample, "\n")
## sample: 1.320324 4.815501 0.6193721 0.5776914 4.722789 4.148839 3.959173 1.756104 3.436954 3.654249
mu = (a+b)/2
sigmasq = ((b-a)^2)/12
sigma = sqrt(sigmasq)
cat("population mean:", mu, "\n")
## population mean: 2.5
cat("population variance:", sigmasq, "\n")
## population variance: 2.083333
xbar = mean(sample)
ssq = var(sample)
cat("sample mean:", xbar, "\n")
## sample mean: 2.9011
cat("sample variance:", ssq, "\n")
## sample variance: 2.769845

With this particular random sample, my sample mean was higher than the population mean by about 0.4 and my sample variance was higher than the population variance by about 0.7

E_T = 10*mu
V_T = 10*sigmasq
E_x = mu
V_x = sigmasq/10
cat("sum mean:", E_T, "\n")
## sum mean: 25
cat("sum variance:", V_T, "\n")
## sum variance: 20.83333
cat("mean mean:", E_x, "\n")
## mean mean: 2.5
cat("mean variance:",V_x, "\n")
## mean variance: 0.2083333
myclt=function(n,iter){
y=runif(n*iter,0,5) # A
data=matrix(y,nr=n,nc=iter,byrow=TRUE) #B
sm=apply(data,2,sum) #C
hist(sm)
sm
}
w=myclt(n=10,iter=10000) #D

mean(w)
## [1] 24.99666
var(w)
## [1] 21.23231

Line A creates y, a random uniform distribution of n*iter variables, with a minimum of 0 and a maximum of 5. Line B creates data, a matrix from the data in y with n rows and iter columns. Line C creates sm, the sum of the numbers in the columns of the data matrix. Line D uses the function myclt with n=10 and iter=10000

myclt=function(n,iter){
y=runif(n*iter,0,5) # A
data=matrix(y,nr=n,nc=iter,byrow=TRUE) #B
sm=apply(data,2,sum) #C
hist(sm/n)
sm/n
}
w=myclt(n=10,iter=10000) #D

mean(w)
## [1] 2.501533
var(w)
## [1] 0.2107113

Task 3

mycltu=function(n,iter,a=0,b=10){
## r-random sample from the uniform
y=runif(n*iter,a,b)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density

ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Now we can make the histogram
hist(w,freq=FALSE,  ylim=c(0,ymax), main=paste("Histogram of sample mean",
"\n", "sample size= ",n,sep=""),xlab="Sample mean")
## add a density curve made from the sample distribution
lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve 
curve(dnorm(x,mean=(a+b)/2,sd=(b-a)/(sqrt(12*n))),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve
## Add the density from which the samples were taken
curve(dunif(x,a,b),add=TRUE,lwd=4)
}
mycltu(n=20,iter=100000)

  • w=apply(data,2,mean), how does the apply function use the 2? THe apply function uses the 2 to to find the mean of the columns (since the order is rows, columns) of the matrix

  • How many terms are in w, when mycltu(n=20,iter=100000) is called? w should have 100000 terms since the matrix has 100000 columns

  • curve(dnorm(x,mean=(a+b)/2 creates a normal curve for the data

  • sd=(b-a)/(sqrt(12*n))),add=TRUE,col="Red",lty=2,lwd=3): Explain why sd takes the formula as shown in the function. Because of the formula for variance of means for a uniform distribution.

mycltu(n=1, iter=10000, a=0,b=10)

mycltu(n=2, iter=10000, a=0,b=10)

mycltu(n=3,iter=10000,a=0,b=10)

mycltu(n=5,iter=10000,a=0,b=10)

mycltu(n=10, iter=10000,a=0,b=10)

mycltu(n=30,iter=10000,a=0,b=10)

As n increases, the distribution begins to look more like a normal distribution.

Task 4

mycltb=function(n,iter,p=0.5,...){

## r-random sample from the Binomial
y=rbinom(n*iter,size=n,prob=p)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density

ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax

## Now we can make the histogram
## freq=FALSE means take a density
hist(w,freq=FALSE,  ylim=c(0,ymax),
main=paste("Histogram of sample mean","\n", "sample size= ",n,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve 
curve(dnorm(x,mean=n*p,sd=sqrt(p*(1-p))),add=TRUE,col="Red",lty=2,lwd=3) 

}

mycltb(n=4,iter=10000,p=0.3)

mycltb(n=5,iter=10000,p=0.3)

mycltb(n=10,iter=10000,p=0.3)

mycltb(n=20,iter=10000,p=0.3)

mycltb(n=4,iter=10000,p=0.7)

mycltb(n=5,iter=10000,p=0.7)

mycltb(n=10,iter=10000,p=0.7)

mycltb(n=20,iter=10000,p=0.7)

mycltb(n=4,iter=10000,p=0.5)

mycltb(n=5,iter=10000,p=0.5)

mycltb(n=10,iter=10000,p=0.5)

mycltb(n=20,iter=10000,p=0.5)

I am not sure what to conclude, other than than the distributions seem consistent even if p changes.

Task 5

mycltp=function(n,iter,lambda=10,...){

## r-random sample from the Poisson
y=rpois(n*iter,lambda=lambda)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density

ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax

## Make a suitable layout for graphing
layout(matrix(c(1,1,2,3),nr=2,nc=2, byrow=TRUE))

## Now we can make the histogram
hist(w,freq=FALSE,  ylim=c(0,ymax), col=rainbow(max(w)),
main=paste("Histogram of sample mean","\n", "sample size= ",n," iter=",iter," lambda=",lambda,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve 
curve(dnorm(x,mean=lambda,sd=sqrt(lambda/n)),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve

# Now make a new plot
# Since y is discrete we should use a barplot
barplot(table(y)/(n*iter),col=rainbow(max(y)), main="Barplot of sampled y", ylab ="Rel. Freq",xlab="y" )
x=0:max(y)
plot(x,dpois(x,lambda=lambda),type="h",lwd=5,col=rainbow(max(y)),
main="Probability function for Poisson", ylab="Probability",xlab="y")
}

mycltp(n=2, iter=10000,lambda=4)

mycltp(n=3,iter=10000,lambda=4)

mycltp(n=5,iter=10000,lambda=4)

mycltp(n=10,iter=10000,lambda=4)

mycltp(n=20,iter=10000,lambda=4)

mycltp(n=2, iter=10000,lambda=10)

mycltp(n=3,iter=10000,lambda=10)

mycltp(n=5,iter=10000,lambda=10)

mycltp(n=10,iter=10000,lambda=10)

mycltp(n=20,iter=10000,lambda=10)

Task video

Task 6

SPRING244753wise0073::myclt(1,2)

## [1] 4.204648 4.381574

Task 7 (Optional)